clear all
*****Set the directory where the data is stored*******************

******Data set for which tested literacy and newspaper readership is available
use master_data_women_men

drop if v000==""
drop weight
gen weight=v005/1000000
svyset [pweight=weight], psu(v021)
****literacy variable

gen rural=.
replace rural=1 if v025==2
replace rural=0 if v025==1

gen litr=.
replace litr=1 if v155==2
replace litr=0 if v155==0|v155==1
tab litr


egen personid=group(v000 v001 v002 v003 v025)


egen N_bycountry = count(personid) if personid~=., by(countryname)

encode countryname, gen(ctryid)
tab v000 if ctryid==.

replace Script_N=1 if countryname=="Albania"
replace Script_N=1 if countryname=="Maldives"
replace Script_N=1 if countryname=="Moldova"
replace Script_N=0 if countryname=="Sao Tome and Principe"
replace Script_N=0 if countryname=="Timor-Leste"
replace Script_N=1 if countryname=="Yemen"
replace Script_N=1 if countryname=="Myanmar"
replace Script_N=1 if countryname=="Ukraine"


replace LP_only_indig=1 if countryname=="Albania"|countryname=="Moldova"|countryname=="Myanmar"|countryname=="Ukraine"
replace LP_also_indig=0 if countryname=="Sao Tome and Principe"|countryname=="Timor-Leste"
replace LP_also_indig=1 if countryname=="Maldives"
replace LP_only_indig=0 if countryname=="Maldives"
replace LP_only_indig=1 if countryname=="Yemen"



bysort Script_N: tab countryname

bysort Script_N: tab litr


bysort Script_N: sum v133, detail


gen language_policy=.
replace language_policy=1 if LP_only_indig==1
replace language_policy=2 if LP_only_indig==0 & LP_also_indig==1 & Script_N==1
replace language_policy=3 if LP_only_indig==0 & LP_also_indig==1 & Script_N==0
replace language_policy=4 if LP_also_indig==0

label define state11 1 "Indigenous" 2 "Equal Roles " 3 "Colonial Favored" 4 "Colonial"
label values language_policy state11
tab language_policy
bysort language_policy: tab countryname

bysort language_policy: sum litr [w=weight]



gen continent=.
replace continent=1 if (region=="ssa"|countryname=="Egypt"|countryname=="Sao Tome and Principe")
replace continent=2 if (region=="sa"|region=="eap"|countryname=="Maldives"|countryname=="Yemen"|countryname=="Jordan"|countryname=="Timor-Leste"|countryname=="Azerbaijan"|countryname=="Armenia"|countryname=="Turkey"  ///
|countryname=="Kyrgyz Republic"|countryname=="Myanmar"|countryname=="Tajikistan")
replace continent=3 if region=="lac"
replace continent=4 if (countryname=="Albania"|countryname=="Moldova"|countryname=="Ukraine")
label define state331 1 "Africa" 2 "Asia" 3 "America" 4 "Europe"
label values continent state331
tab continent
bysort continent: tab countryname

tab continent, gen(cont)


egen cluster = group(v000 v024 v025 v001)


gen new_rd=.
replace new_rd=1 if v157>=2 & v157<9
replace new_rd=0 if v157<=1
tab new_rd

gen new_rds=.
replace new_rds=1 if v157>=1 & v157<9
replace new_rds=0 if v157<1
tab new_rds


gen yrs_schl_all=.
replace yrs_schl_all= v133
replace yrs_schl_all= . if v133>=90 & yrs_schl_all~=.
label variable yrs_schl_all "Years of Schooling - Full sample"
tab yrs_schl_all

tab yrs_schl_all, gen(eduyrs)

egen eduyrsgr10=rowmax(eduyrs10-eduyrs25)

gen edu_yrs=yrs_schl_all if yrs_schl_all<=9
replace edu_yrs=10 if yrs_schl_all>=10
tab edu_yrs


preserve
drop if v000==""
log using countries_by_lang_policy, replace
bysort language_policy: tab countryname
log close
restore

egen clustera = group(v000 v001)



sort country

drop if country=="."
sort country

replace v007=2015 if v007==1394
replace v007=2016 if v007==2073

gen yearsurvey=.
replace yearsurvey=1 if v007>=2006 & v007<=2010
replace yearsurvey=2 if v007>=2011 & v007<=2015
replace yearsurvey=3 if v007>=2016 & v007<=2020
  
bysort yearsurvey: tab country


******Figure 1: Official language choice, human capital, political engagement and assets - Panels A and C 
***********Constructing literacy and newspaper readership averages and CI by language policy


reghdfe litr i.language_policy  [pw=weight], absorb(yearsurvey) cluster(clustera)

margins i.language_policy, post


coefplot (., keep(1.language_policy)) ///
(., keep(2.language_policy)) ///
(., keep(3.language_policy)) ///
 (., keep(4.language_policy)) ///
, vertical  nooffsets recast(bar) barwidth(0.5) fcolor(*1.51) ///
citop ciopts(recast(rcap))  mlabel mlabcolor(black) ///
coeflabels(, notick labgap(2)) plotregion(margin(b=0)) ylabel(, nogrid) bgcolor(white) graphregion(color(white)) ///
format(%9.2f) addplot(scatter @b @at, ms(i) mlabel(@b) mlabpos(2) mlabcolor(black)) ///
xlabel(, labsize(zero) angle(30)) title("Panel A", size(small) color (black)) ylabel(0(1)1) yscale(range(0(0.1)1))  ytitle("Proportion Literate", ///
size(normalsize) color(black))  ///
legend(order (1 "Indigenous" 3 "Equal Roles " 5 "Colonial Favored" 7 "Colonial") rows(2) size(small)) saving(litrbylangpolicy, replace)



reghdfe new_rds i.language_policy  [pw=weight], absorb(yearsurvey) cluster(clustera)


margins i.language_policy, post


coefplot (., keep(1.language_policy)) ///
(., keep(2.language_policy)) ///
(., keep(3.language_policy)) ///
 (., keep(4.language_policy)) ///
, vertical  nooffsets recast(bar) barwidth(0.5) fcolor(*1.51) ///
citop ciopts(recast(rcap))  mlabel mlabcolor(black) ///
coeflabels(, notick labgap(2)) plotregion(margin(b=0)) ylabel(, nogrid) bgcolor(white) graphregion(color(white)) ///
format(%9.2f) addplot(scatter @b @at, ms(i) mlabel(@b) mlabpos(2) mlabcolor(black)) ///
xlabel(, labsize(zero) angle(30)) title("Panel C", size(small) color (black)) ylabel(0(0.5)0.5) yscale(range(0(0.1)0.5))  ytitle("Proportion Reading Newspaper", ///
size(normalsize) color(black))  ///
legend(order (1 "Indigenous" 3 "Equal Roles " 5 "Colonial Favored" 7 "Colonial") rows(1) size(small)) saving(news1bylangpolicy, replace)



******Table A1: Association between official language choice and human capital, political engagement and assets - COLUMNS (1) - (4) **************************

sum litr   new_rds if language_policy==1 [pweight=weight] 

reghdfe litr i.language_policy [pw=weight] , absorb(v012 Female  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90)  label paren(se)   replace 

reghdfe litr i.language_policy cont1 cont2  [pw=weight], absorb(v012 Female  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90) label  paren(se)   append 


reghdfe new_rds i.language_policy  [pw=weight], absorb(v012 Female  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90) label paren(se)   append 

reghdfe new_rds i.language_policy cont1 cont2 [pw=weight], absorb(v012 Female  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90) label paren(se)   append 


********************************************************************************************************************************
********************************************************************************************************************************

*****Now load the dataset for the sample of countries for which years of schooling and assets are available 
***set the directory where the data is stored
clear all
use master_data_women_menv133

drop if v000==""

gen weight=v005/1000000

tab countryname

replace Script_N=1 if countryname=="Albania"
replace Script_N=1 if countryname=="Maldives"
replace Script_N=1 if countryname=="Moldova"
replace Script_N=1 if countryname=="Myanmar"
replace Script_N=1 if countryname=="Ukraine"
replace Script_N=0 if countryname=="Sao Tome and Principe"
replace Script_N=0 if countryname=="Timor-Leste"
replace Script_N=1 if countryname=="Yemen"


replace LP_only_indig=1 if countryname=="Albania"|countryname=="Moldova"|countryname=="Myanmar"|countryname=="Ukraine"
replace LP_also_indig=0 if countryname=="Sao Tome and Principe"|countryname=="Timor-Leste"
replace LP_also_indig=1 if countryname=="Maldives"
replace LP_only_indig=0 if countryname=="Maldives"
replace LP_only_indig=1 if countryname=="Yemen"



bysort Script_N: tab countryname



bysort Script_N: sum v133, detail


gen language_policy=.
replace language_policy=1 if LP_only_indig==1
replace language_policy=2 if LP_only_indig==0 & LP_also_indig==1 & Script_N==1
replace language_policy=3 if LP_only_indig==0 & LP_also_indig==1 & Script_N==0
replace language_policy=4 if LP_also_indig==0

label define state11 1 "Indigenous" 2 "Equal Roles" 3 "Colonial Favored" 4 "Colonial"
label values language_policy state11
tab language_policy


log using sample, replace
*THIS IS THE SAMPLE OF COUNTRIES FOR WHICH DHS DATA IS AVAILABLE
tab countryname if language_policy!=.
*THIS IS THE CLASSIFICATION OF COUNTRIES INTO LANGUAGE POLICY CATEGORIES FOR WHICH DHS DATA IS AVAILABLE
bysort language_policy: tab countryname
log close




gen continent=.
replace continent=1 if (region=="ssa"|countryname=="Egypt"|countryname=="Sao Tome and Principe"|countryname=="Morocco")
replace continent=2 if (region=="sa"|region=="eap"|countryname=="Maldives"|countryname=="Yemen"|countryname=="Jordan"|countryname=="Timor-Leste"|countryname=="Azerbaijan"|countryname=="Armenia"|countryname=="Turkey"  ///
|countryname=="Kyrgyz Republic"|countryname=="Myanmar"|countryname=="Tajikistan"|countryname=="Kazakhstan"|countryname=="Uzbekistan")
replace continent=3 if region=="lac"
replace continent=4 if (countryname=="Albania"|countryname=="Moldova"|countryname=="Ukraine")
label define state331 1 "Africa" 2 "Asia" 3 "America" 4 "Europe"
label values continent state321
tab continent
bysort continent: tab countryname

tab continent, gen(cont)

egen hhid = group(v000 v001 v002)


gen toilet=.
replace toilet=0 if v116==31
replace toilet=1 if v116~=31 & v116~=. & v116<90
tab toilet

egen clustera = group(v000 v001)
replace v007=2002 if v007==2

replace v007=2015 if v007==1394
replace v007=2016 if v007==2073

 gen yearsurvey=.
 
   replace yearsurvey=0 if v007<2006 
replace yearsurvey=1 if v007>=2006 & v007<=2010
  replace yearsurvey=2 if v007>=2011 & v007<=2015
  replace yearsurvey=3 if v007>=2016 & v007<=2020
  
  bysort yearsurvey: tab country




********************************************************


****Figure 1: Official language choice, human capital, political engagement and assets - Panels B and D 
****Years of schooling and asset ownership
reghdfe v133 i.language_policy  if v133<90  [pw=weight], absorb( yearsurvey) cluster(clustera)


margins i.language_policy, post


coefplot (., keep(1.language_policy)) ///
(., keep(2.language_policy)) ///
(., keep(3.language_policy)) ///
 (., keep(4.language_policy)) ///
, vertical  nooffsets recast(bar) barwidth(0.5) fcolor(*1.51) ///
citop ciopts(recast(rcap))  mlabel mlabcolor(black) ///
coeflabels(, notick labgap(2)) plotregion(margin(b=0)) ylabel(, nogrid) bgcolor(white) graphregion(color(white)) ///
format(%9.2f) addplot(scatter @b @at, ms(i) mlabel(@b) mlabpos(2) mlabcolor(black)) ///
xlabel(, labsize(zero) angle(30)) title("Panel B", size(small) color (black)) ylabel(0(10)10) yscale(range(0(1)10))  ytitle("Avg. Years of Schooling", ///
size(normalsize) color(black))  ///
legend(order (1 "Indigenous" 3 "Equal Roles" 5 "Colonial Favored" 7 "Colonial") rows(1) size(small))  saving(educbylangpolicy, replace) 



preserve
drop if v119>1
drop if v120>1
drop if v121>1
drop if v122>1
drop if v123>1
drop if v124>1
drop if v125>1

egen mean_assets = rowmean(v119 v120 v121 v122 v123 v124 v125 toilet)
sum mean_assets

 foreach v of var * { 
	local l`v' : variable label `v' 
} 

 
collapse (mean) mean_assets toilet v119-v125 language_policy yearsurvey v025 clustera (firstnm) countryname [pw=weight], by(hhid)

foreach v of var * { 
	label var `v' "`l`v''" 
}
label define state129 1 "Indigenous" 2 "Equal Roles" 3 "Colonial Favored" 4"Colonial" 
label values language_policy state129



reghdfe mean_assets i.language_policy, absorb(yearsurvey) cluster(clustera)


margins i.language_policy, post


coefplot (., keep(1.language_policy)) ///
(., keep(2.language_policy)) ///
(., keep(3.language_policy)) ///
 (., keep(4.language_policy)) ///
, vertical  nooffsets recast(bar) barwidth(0.5) fcolor(*1.51) ///
citop ciopts(recast(rcap))  mlabel mlabcolor(black) ///
coeflabels(, notick labgap(2)) plotregion(margin(b=0)) ylabel(, nogrid) bgcolor(white) graphregion(color(white)) ///
format(%9.2f) addplot(scatter @b @at, ms(i) mlabel(@b) mlabpos(2) mlabcolor(black)) ///
xlabel(, labsize(zero) angle(30)) title("Panel D", size(small) color (black)) ylabel(0(0.6)0.6) yscale(range(0(0.1)0.6))  ytitle("Proportion of 8 HH assets", ///
size(normalsize) color(black))  ///
legend(order (1 "Indigenous" 3 "Equal Roles" 5 "Colonial Favored" 7 "Colonial") rows(1) size(small)) saving(assetsbylangpolicy, replace)



restore


*****GENERATE THE COMBINED 4 PANEL for Figure 1: Official language choice, human capital, political engagement and assets

grc1leg litrbylangpolicy.gph educbylangpolicy.gph news1bylangpolicy.gph assetsbylangpolicy.gph, legendfrom(educbylangpolicy.gph) ///
graphregion(color(white)) saving(overview_langpolicy_outcomes.gph, replace)
graph export overview_langpolicy_outcomes.pdf, replace


*****Table A1: Association between official language choice and human capital, political engagement and assets - COLUMNS (4)-(8) 
sum  v133   if language_policy==1 [w=weight] 


reghdfe v133 i.language_policy if v133<90 [pw=weight], absorb(v012 Female  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90) label  paren(se)   append 

reghdfe v133 i.language_policy cont1 cont2 if v133<90  [pw=weight], absorb(v012 Female  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90)  label paren(se)   append 


preserve
drop if v119>1
drop if v120>1
drop if v121>1
drop if v122>1
drop if v123>1
drop if v124>1
drop if v125>1

egen mean_assets = rowmean(v119 v120 v121 v122 v123 v124 v125 toilet)


 foreach v of var * { 
	local l`v' : variable label `v' 
} 

 
collapse (mean) mean_assets toilet v119-v125  language_policy yearsurvey v025 clustera cont1 cont2 cont3 cont4 (firstnm) countryname [pw=weight], by(hhid)

foreach v of var * { 
	label var `v' "`l`v''" 
}
label define state129 1 "Indigenous" 2 "Equal Roles" 3 "Colonial Favored" 4 "Colonial"
label values language_policy state129


sum mean_assets if language_policy==1
reghdfe mean_assets i.language_policy, absorb(  yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90) label paren(se)   append 


reghdfe mean_assets i.language_policy cont1 cont2, absorb( yearsurvey) cluster(clustera)
outreg2 using outcomesLP.tex,  stats(coef se) auto(2) level(90) label paren(se)   append 
restore


